################################################################################
# SCRIPT: Prepare survey data from CIS
#
# NOTES: Surveys used:
#        (i)  Language use in bilingual communities - MD2052, MD2296
#        (ii) Social and political situation in the BC - MD2096, MD2282, MD2407, MD2593
################################################################################

################################################################################
##### Packages and directories #####
################################################################################

## Add your own root directory
#setwd("Replication package/Survey")


## Clean environment
rm(list=ls())

## Packages
library(haven)
library(tidyverse)
library(labelled)

## Input directory
indir <- "Data/"

## Output directory
outdir <- "Output/"


##### NOTES #####
## Some survey files were provided in SPSS format
## They were opened using SPSS, then exported in .csv format

################################################################################
##### Surveys on language use #####
################################################################################

##### MD2052 (1993) #####

## Import data for the Basque Country
d <- read_sav(paste0(indir, "2052Pvas.sav"), encoding = "UTF-8")

## Rename variables
d <- rename(d, 
            province_birth = P1,
            province_father = P2,
            province_mother = P3,
            lived_bc = P4,
            spanish_knowledge = P7,
            euskera_knowledge = P8,
            language_parental = P10,
            province_spouse = P28A,
            language_couple = P28C,
            sons = P29,
            language_with_sons = P29D,
            age = P30B01,
            gender = P30C01,
            basqueif_liveandwork = P3701,
            basqueif_speakeuskera = P3702,
            basqueif_basquefam = P3703,
            basqueif_bornbc = P3704,
            basqueif_willing = P3705,
            basqueif_nationalist = P3706,
            basque_id_5p = P42,
            basquediff_1st = P4301,
            basquediff_2nd = P4302,
            basquediff_3rd = P4303,
            basque_id_10p = P45,
            meaning_spain = P46,
            ideological_10p = P47,
            education = ESTUDIOS,
            religiosity = P58,
            income = P59,
            votosima = VOTOSIMA,
            votosimg = VOTOSIMG
            )


## Survey year
d$svy_year <- 1993


##### MD2296 (1998) #####
dd <- read.csv(paste0(indir, "MD2296.csv"), encoding = "UTF-8")

# ## Rename variables
dd <- rename(dd,
             CUES = `X.U.FEFF.CUES`,
             REG = CCAA,
             TAMANO = TAMUNI,
             province_birth = P1,
             province_father = P2,
             province_mother = P3,
             lived_bc = P4,
             spanish_knowledge = P7,
             euskera_knowledge = P8,
             language_parental = P10,
             province_spouse = P25B,
             language_couple = P25D,
             sons = P26,
             language_with_sons = P26D,
             gender = P47,
             age = P48,
             basqueif_liveandwork = P3101,
             basqueif_speakeuskera = P3102,
             basqueif_basquefam = P3103,
             basqueif_bornbc = P3104,
             basqueif_willing = P3105,
             basqueif_nationalist = P3106,
             basque_id_5p = P40,
             basque_id_10p = P42,
             meaning_spain = P43,
             ideological_10p = P44,
             education = ESTUDIOS,
             income = P56)
# missing in this wave: diff between BC and Spain,
#                      religiosity (different coding),
#                      vote + support (only vote)

## ID for the study
dd$ESTU <- 2296

## Survey year
dd$svy_year <- 1998



##### Append the two surveys #####
vars_to_keep <- c("ESTU", "CUES", "REG", "PROV", "MUN", "TAMANO", "svy_year", 
                  "province_birth", "province_father", "province_mother",
                  "lived_bc", "spanish_knowledge", "euskera_knowledge",
                  "language_parental", "province_spouse", "language_couple",
                  "sons", "language_with_sons", "gender", "age", 
                  "basqueif_liveandwork", "basqueif_speakeuskera",
                  "basqueif_basquefam", "basqueif_bornbc", "basqueif_willing",
                  "basqueif_nationalist", "basque_id_5p", "basque_id_10p",
                  "meaning_spain", "ideological_10p", "education", "income")


dd <- select(dd, all_of(vars_to_keep))

## Augment with the variables which are only in the first survey
vars_to_keep <- c(vars_to_keep, "basquediff_1st", "basquediff_2nd", 
                  "basquediff_3rd", "religiosity", "votosima", "votosimg")

d <- select(d, all_of(vars_to_keep))


## Append
cis <- bind_rows(d, dd)

## Save
save(cis, file = paste0(indir, "CIS_lang.RData"))


## Now clean the space
rm(d, dd, cis)


################################################################################
##### Surveys on politics in the BC #####
################################################################################

##### MD 2096 (1994) #####
d <- read.csv(paste0(indir, "MD2096.csv"), encoding = "UTF-8")

## Rename variables
d <- rename(d,
            ESTU = `X.U.FEFF.ESTU`,
            REG = CCAA,
            TAMANO = TAMUNI,
            voted_last_nat = P9,
            voted_last_aut = P40,
            ideological_10p = P38,
            basque_id_10p = P43,
            gender = P50,
            age = P51)

## Survey year
d$svy_year <- 1994

## Keep only relevant variables
vars_to_keep <- c("ESTU", "CUES", "REG", "PROV", "MUN",
                  "voted_last_nat", "voted_last_aut", "ideological_10p", "basque_id_10p",
                  "gender", "age", "ESTATUS", "ESTUDIOS", "svy_year")

d <- select(d, all_of(vars_to_keep))




##### MD 2282 (1998) #####
dd <- read.csv(paste0(indir, "MD2282.csv"), encoding = "UTF-8")

## Rename variables
dd <- rename(dd,
             CUES = `X.U.FEFF.CUES`,
             REG = CCAA,
             voted_last_nat = P22,
             voted_last_aut = P32,
             ideological_10p = P30,
             basque_id_10p = P29,
             gender = P33,
             age = P34)

dd$ESTU <- 2282

## Survey year
dd$svy_year <- 1998

## Keep only relevant variables
vars_to_keep <- c("ESTU", "CUES", "REG", "PROV", "MUN",
                  "voted_last_nat", "voted_last_aut", "ideological_10p", "basque_id_10p",
                  "gender", "age", "ESTATUS", "ESTUDIOS","svy_year")

dd <- select(dd, all_of(vars_to_keep))



##### MD2407 (2001) #####
ddd <- read.csv(paste0(indir, "MD2407.csv"), encoding = "UTF-8")

## Rename variables
ddd <- rename(ddd,
              CUES = `X.U.FEFF.CUES`,
              REG = CCAA,
              voted_last_aut = P45,
              ideological_10p = P49,
              basque_id_10p = P12,
              gender = P50,
              age = P51)


ddd$ESTU <- 2407

## Survey year
ddd$svy_year <- 2001

## Keep only relevant variables
vars_to_keep <- c("ESTU", "CUES", "REG", "PROV", "MUN",
                  "voted_last_aut", "ideological_10p", "basque_id_10p",
                  "gender", "age", "ESTATUS", "ESTUDIOS", "svy_year")

ddd <- select(ddd, all_of(vars_to_keep))



##### MD2593 (2005) #####
dddd <- read.csv(paste0(indir, "MD2593.csv"), encoding = "UTF-8")

## Rename variables
dddd <- rename(dddd,
               CUES = `X.U.FEFF.CUES`,
               REG = CCAA,
               voted_last_nat = p54,
               voted_last_aut = p52,
               ideological_10p = p53,
               basque_id_10p = p29,
               gender = p55,
               age = p56)

dddd$ESTU <- 2593

## Survey year
dddd$svy_year <- 2005

## Keep only the relevant variables
vars_to_keep <- c("ESTU", "CUES", "REG", "PROV", "MUN",
                  "voted_last_nat", "voted_last_aut", "ideological_10p",
                  "basque_id_10p", "gender", "age", "ESTATUS", "ESTUDIOS", "svy_year")

dddd <- select(dddd, all_of(vars_to_keep))



## Append all rounds
cis_pol <- bind_rows(d, dd, ddd, dddd)

## Save
save(cis_pol, file = paste0(indir, "CIS_pol.RData"))


## Now clean the space
rm(d, dd, ddd, dddd)
